

<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Noun Phrase to Vec &mdash; NLP Architect by Intel® AI Lab 0.5.2 documentation</title>
  

  
  
  
  

  
  <script type="text/javascript" src="_static/js/modernizr.min.js"></script>
  
    
      <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
        <script type="text/javascript" src="_static/jquery.js"></script>
        <script type="text/javascript" src="_static/underscore.js"></script>
        <script type="text/javascript" src="_static/doctools.js"></script>
        <script type="text/javascript" src="_static/language_data.js"></script>
        <script type="text/javascript" src="_static/install.js"></script>
        <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
    
    <script type="text/javascript" src="_static/js/theme.js"></script>

    

  
  <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="_static/nlp_arch_theme.css" type="text/css" />
  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto+Mono" type="text/css" />
  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Open+Sans:100,900" type="text/css" />
    <link rel="index" title="Index" href="genindex.html" />
    <link rel="search" title="Search" href="search.html" /> 
</head>

<body class="wy-body-for-nav">

   
  <div class="wy-grid-for-nav">
    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >
          

          
            <a href="index.html">
          

          
            
            <img src="_static/logo.png" class="logo" alt="Logo"/>
          
          </a>

          

          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
            
              
            
            
              <ul>
<li class="toctree-l1"><a class="reference internal" href="quick_start.html">Quick start</a></li>
<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="publications.html">Publications</a></li>
<li class="toctree-l1"><a class="reference internal" href="tutorials.html">Jupyter Tutorials</a></li>
<li class="toctree-l1"><a class="reference internal" href="model_zoo.html">Model Zoo</a></li>
</ul>
<p class="caption"><span class="caption-text">NLP/NLU Models</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="tagging/sequence_tagging.html">Sequence Tagging</a></li>
<li class="toctree-l1"><a class="reference internal" href="sentiment.html">Sentiment Analysis</a></li>
<li class="toctree-l1"><a class="reference internal" href="bist_parser.html">Dependency Parsing</a></li>
<li class="toctree-l1"><a class="reference internal" href="intent.html">Intent Extraction</a></li>
<li class="toctree-l1"><a class="reference internal" href="lm.html">Language Models</a></li>
<li class="toctree-l1"><a class="reference internal" href="information_extraction.html">Information Extraction</a></li>
<li class="toctree-l1"><a class="reference internal" href="transformers.html">Transformers</a></li>
<li class="toctree-l1"><a class="reference internal" href="archived/additional.html">Additional Models</a></li>
</ul>
<p class="caption"><span class="caption-text">Optimized Models</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="quantized_bert.html">Quantized BERT</a></li>
<li class="toctree-l1"><a class="reference internal" href="transformers_distillation.html">Transformers Distillation</a></li>
<li class="toctree-l1"><a class="reference internal" href="sparse_gnmt.html">Sparse Neural Machine Translation</a></li>
</ul>
<p class="caption"><span class="caption-text">Solutions</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="absa_solution.html">Aspect Based Sentiment Analysis</a></li>
<li class="toctree-l1"><a class="reference internal" href="term_set_expansion.html">Set Expansion</a></li>
<li class="toctree-l1"><a class="reference internal" href="trend_analysis.html">Trend Analysis</a></li>
</ul>
<p class="caption"><span class="caption-text">For Developers</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="generated_api/nlp_architect_api_index.html">nlp_architect API</a></li>
<li class="toctree-l1"><a class="reference internal" href="developer_guide.html">Developer Guide</a></li>
</ul>

            
          
        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" aria-label="top navigation">
        
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="index.html">NLP Architect by Intel® AI Lab</a>
        
      </nav>


      <div class="wy-nav-content">
        
        <div class="rst-content">
        
          















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">
    
      <li><a href="index.html">Docs</a> &raquo;</li>
        
      <li>Noun Phrase to Vec</li>
    
    
      <li class="wy-breadcrumbs-aside">
        
            
        
      </li>
    
  </ul>

  
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="noun-phrase-to-vec">
<h1>Noun Phrase to Vec<a class="headerlink" href="#noun-phrase-to-vec" title="Permalink to this headline">¶</a></h1>
<div class="section" id="overview">
<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
<p>Noun Phrases (NP) play a particular role in NLP applications.
This code consists in training a word embedding’s model for Noun NP’s using <a class="reference external" href="https://code.google.com/archive/p/word2vec/">word2vec</a> or <a class="reference external" href="https://github.com/facebookresearch/fastText">fasttext</a> algorithm.
It assumes that the NP’s are already extracted and marked in the input corpus.
All the terms in the corpus are used as context in order to train the word embedding’s model; however,
at the end of the training, only the word embedding’s of the NP’s are stored, except for the case of
Fasttext training with word_ngrams=1; in this case, we store all the word embedding’s,
including non-NP’s in order to be able to estimate word embeddings of out-of-vocabulary NP’s
(NP’s that don’t appear in the training corpora).</p>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">This code can be also used to train a word embedding’s model on any marked corpus.
For example, if you mark verbs in your corpus, you can train a verb2vec model.</p>
</div>
<p>NP’s have to be marked in the corpus by a marking character between the words of the NP and as a suffix of the NP.
For example, if the marking character is “_”, the NP “Natural Language Processing” will be marked as “Natural_Language_Processing”.</p>
<p>We use the <a class="reference external" href="https://www.clips.uantwerpen.be/conll2000/chunking/">CONLL2000</a> shared task dataset in the default parameters of our example for training
<a class="reference internal" href="generated_api/nlp_architect.models.html#nlp_architect.models.np2vec.NP2vec" title="nlp_architect.models.np2vec.NP2vec"><code class="xref py py-class docutils literal notranslate"><span class="pre">NP2vec</span></code></a> model. The terms and conditions of the data set license apply. Intel does not grant any rights to the data files.</p>
</div>
<div class="section" id="files">
<h2>Files<a class="headerlink" href="#files" title="Permalink to this headline">¶</a></h2>
<ul class="simple">
<li><a class="reference internal" href="generated_api/nlp_architect.models.html#nlp_architect.models.np2vec.NP2vec" title="nlp_architect.models.np2vec.NP2vec"><code class="xref py py-class docutils literal notranslate"><span class="pre">NP2vec</span></code></a> model training, store and load code.</li>
<li><strong>examples/np2vec/train.py</strong>: illustrates how to call <a class="reference internal" href="generated_api/nlp_architect.models.html#nlp_architect.models.np2vec.NP2vec" title="nlp_architect.models.np2vec.NP2vec"><code class="xref py py-class docutils literal notranslate"><span class="pre">NP2vec</span></code></a> training and store code.</li>
<li><strong>examples/np2vec/inference.py</strong>: illustrates how to call <a class="reference internal" href="generated_api/nlp_architect.models.html#nlp_architect.models.np2vec.NP2vec" title="nlp_architect.models.np2vec.NP2vec"><code class="xref py py-class docutils literal notranslate"><span class="pre">NP2vec</span></code></a> load code.</li>
</ul>
</div>
<div class="section" id="running-modalities">
<h2>Running Modalities<a class="headerlink" href="#running-modalities" title="Permalink to this headline">¶</a></h2>
<div class="section" id="training">
<h3>Training<a class="headerlink" href="#training" title="Permalink to this headline">¶</a></h3>
<p>To train the model with default parameters, the following command can be used:</p>
<div class="code python highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">python</span> <span class="n">examples</span><span class="o">/</span><span class="n">np2vec</span><span class="o">/</span><span class="n">train</span><span class="o">.</span><span class="n">py</span> \
  <span class="o">--</span><span class="n">corpus</span> <span class="n">sample_corpus</span><span class="o">.</span><span class="n">json</span> \
  <span class="o">--</span><span class="n">corpus_format</span> <span class="n">json</span> \
  <span class="o">--</span><span class="n">np2vec_model_file</span> <span class="n">sample_np2vec</span><span class="o">.</span><span class="n">model</span>
</pre></div>
</div>
</div>
<div class="section" id="inference">
<h3>Inference<a class="headerlink" href="#inference" title="Permalink to this headline">¶</a></h3>
<p>To run inference with a saved model, the following command can be used:</p>
<div class="code python highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">python</span> <span class="n">examples</span><span class="o">/</span><span class="n">np2vec</span><span class="o">/</span><span class="n">inference</span><span class="o">.</span><span class="n">py</span> <span class="o">--</span><span class="n">np2vec_model_file</span> <span class="n">sample_np2vec</span><span class="o">.</span><span class="n">model</span> <span class="o">--</span><span class="n">np</span> <span class="o">&lt;</span><span class="n">noun</span> <span class="n">phrase</span><span class="o">&gt;</span>
</pre></div>
</div>
<p>More details about the hyperparameters at <a class="reference external" href="https://radimrehurek.com/gensim/models/word2vec.html#gensim.models.word2vec.Word2Vec">https://radimrehurek.com/gensim/models/word2vec.html#gensim.models.word2vec.Word2Vec</a> for word2vec and <a class="reference external" href="https://radimrehurek.com/gensim/models/fasttext.html#gensim.models.fasttext.FastText">https://radimrehurek.com/gensim/models/fasttext.html#gensim.models.fasttext.FastText</a> for Fasttext.</p>
</div>
</div>
</div>


           </div>
           
          </div>
          <footer>
  

  <hr/>

  <div role="contentinfo">
    <p>

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  


  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  
  
    
   

</body>
</html>